I. Introduction & Motivation

II. Setup

我把这个部分的标题都删掉了,因为这些setup没必要展示在final deliverable里面。

In this section, we loaded necessary libraries, created plot theme options and map theme options, and identified functions for quintile breaks.

# 1. load Libraries
knitr::opts_chunk$set(echo = TRUE)
options(scipen=999)


library(tidyverse)
library(tidycensus)
library(sf)
library(gridExtra)
library(grid)
library(knitr)
library(kableExtra)
library(rmarkdown)

mapTheme <- function(base_size = 12) {
  theme(
    text = element_text( color = "black"),
    plot.title = element_text(size = 16,colour = "black"),
    plot.subtitle=element_text(face="italic"),
    plot.caption=element_text(hjust=0),
    axis.ticks = element_blank(),
    panel.background = element_blank(),axis.title = element_blank(),
    axis.text = element_blank(),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(colour = "black", fill=NA, size=2),
    strip.text.x = element_text(size = 14))
}

plotTheme <- function(base_size = 12) {
  theme(
    text = element_text( color = "black"),
    plot.title = element_text(size = 16,colour = "black"),
    plot.subtitle = element_text(face="italic"),
    plot.caption = element_text(hjust=0),
    axis.ticks = element_blank(),
    panel.background = element_blank(),
    panel.grid.major = element_line("grey80", size = 0.1),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(colour = "black", fill=NA, size=2),
    strip.background = element_rect(fill = "grey80", color = "white"),
    strip.text = element_text(size=12),
    axis.title = element_text(size=12),
    axis.text = element_text(size=10),
    plot.background = element_blank(),
    legend.background = element_blank(),
    legend.title = element_text(colour = "black", face = "italic"),
    legend.text = element_text(colour = "black", face = "italic"),
    strip.text.x = element_text(size = 14)
  )
}

# 3. Identify functions
qBr <- function(df, variable, rnd) {
  if (missing(rnd)) {
    as.character(quantile(round(df[[variable]],0),
                          c(.01,.2,.4,.6,.8), na.rm=T))
  } else if (rnd == FALSE | rnd == F) {
    as.character(formatC(quantile(df[[variable]]), digits = 3),
                 c(.01,.2,.4,.6,.8), na.rm=T)
  }
}

q5 <- function(variable) {as.factor(ntile(variable, 5))}


multipleRingBuffer <- function(inputPolygon, maxDistance, interval) 
{
  #create a list of distances that we'll iterate through to create each ring
  distances <- seq(0, maxDistance, interval)
  #we'll start with the second value in that list - the first is '0'
  distancesCounter <- 2
  #total number of rings we're going to create
  numberOfRings <- floor(maxDistance / interval)
  #a counter of number of rings
  numberOfRingsCounter <- 1
  #initialize an otuput data frame (that is not an sf)
  allRings <- data.frame()
  
  #while number of rings  counteris less than the specified nubmer of rings
  while (numberOfRingsCounter <= numberOfRings) 
  {
    #if we're interested in a negative buffer and this is the first buffer
    #(ie. not distance = '0' in the distances list)
    if(distances[distancesCounter] < 0 & distancesCounter == 2)
    {
      #buffer the input by the first distance
      buffer1 <- st_buffer(inputPolygon, distances[distancesCounter])
      #different that buffer from the input polygon to get the first ring
      buffer1_ <- st_difference(inputPolygon, buffer1)
      #cast this sf as a polygon geometry type
      thisRing <- st_cast(buffer1_, "POLYGON")
      #take the last column which is 'geometry'
      thisRing <- as.data.frame(thisRing[,ncol(thisRing)])
      #add a new field, 'distance' so we know how far the distance is for a give ring
      thisRing$distance <- distances[distancesCounter]
    }
    
    
    #otherwise, if this is the second or more ring (and a negative buffer)
    else if(distances[distancesCounter] < 0 & distancesCounter > 2) 
    {
      #buffer by a specific distance
      buffer1 <- st_buffer(inputPolygon, distances[distancesCounter])
      #create the next smallest buffer
      buffer2 <- st_buffer(inputPolygon, distances[distancesCounter-1])
      #This can then be used to difference out a buffer running from 660 to 1320
      #This works because differencing 1320ft by 660ft = a buffer between 660 & 1320.
      #bc the area after 660ft in buffer2 = NA.
      thisRing <- st_difference(buffer2,buffer1)
      #cast as apolygon
      thisRing <- st_cast(thisRing, "POLYGON")
      #get the last field
      thisRing <- as.data.frame(thisRing$geometry)
      #create the distance field
      thisRing$distance <- distances[distancesCounter]
    }
    
    #Otherwise, if its a positive buffer
    else 
    {
      #Create a positive buffer
      buffer1 <- st_buffer(inputPolygon, distances[distancesCounter])
      #create a positive buffer that is one distance smaller. So if its the first buffer
      #distance, buffer1_ will = 0. 
      buffer1_ <- st_buffer(inputPolygon, distances[distancesCounter-1])
      #difference the two buffers
      thisRing <- st_difference(buffer1,buffer1_)
      #cast as a polygon
      thisRing <- st_cast(thisRing, "POLYGON")
      #geometry column as a data frame
      thisRing <- as.data.frame(thisRing[,ncol(thisRing)])
      #add teh distance
      thisRing$distance <- distances[distancesCounter]
    }  
    
    #rbind this ring to the rest of the rings
    allRings <- rbind(allRings, thisRing)
    #iterate the distance counter
    distancesCounter <- distancesCounter + 1
    #iterate the number of rings counter
    numberOfRingsCounter <- numberOfRingsCounter + 1
  }
  
  #convert the allRings data frame to an sf data frame
  allRings <- st_as_sf(allRings)
}

palette5 <- c("#CCCCFF","#CC99FF","#9966CC","#663399","#330066")

# Load API
census_api_key("a7edab3d7c3df571998caab5a3cc12a4ec8d8b61" , overwrite  = TRUE)

III. Data Manipulation and Visualization

1. Data Wrangling

这里介绍dataset

1.1 Fetch Census Data

这里介绍选择什么year census data 以及什么变量

# ---- 2009 Census Data -----
tracts09 <-  
  get_acs(geography = "tract", variables = c("B25026_001E","B02001_002E","B15001_050E",
                                             "B15001_009E","B19013_001E","B25058_001E",
                                             "B06012_002E"), 
          year=2009, state= 48, county= 113, geometry=T) %>% 
  st_transform('EPSG:32138')


tracts09 <- 
  tracts09 %>%
  dplyr::select( -NAME, -moe) %>%
  spread(variable, estimate) %>%
  dplyr::select(-geometry) %>%
  rename(TotalPop = B25026_001, 
         Whites = B02001_002,
         FemaleBachelors = B15001_050, 
         MaleBachelors = B15001_009,
         MedHHInc = B19013_001, 
         MedRent = B25058_001,
         TotalPoverty = B06012_002)


tracts09 <- 
  tracts09 %>%
  mutate(pctWhite = ifelse(TotalPop > 0, Whites / TotalPop, 0),
         pctBachelors = ifelse(TotalPop > 0, ((FemaleBachelors + MaleBachelors) / TotalPop * 100), 0),
         pctPoverty = ifelse(TotalPop > 0, TotalPoverty / TotalPop, 0),
         year = "2009") %>%
  dplyr::select(-Whites,-FemaleBachelors,-MaleBachelors,-TotalPoverty)

# ---- 2017 Census Data -----

tracts17 <- 
  get_acs(geography = "tract", variables = c("B25026_001E","B02001_002E","B15001_050E",
                                             "B15001_009E","B19013_001E","B25058_001E",
                                             "B06012_002E"), 
          year=2017, state= 48, county= 113, geometry=T, output="wide") %>%
  st_transform('EPSG:32138') %>%
  rename(TotalPop = B25026_001E, 
         Whites = B02001_002E,
         FemaleBachelors = B15001_050E, 
         MaleBachelors = B15001_009E,
         MedHHInc = B19013_001E, 
         MedRent = B25058_001E,
         TotalPoverty = B06012_002E) %>%
  dplyr::select(-NAME, -starts_with("B")) %>% #-starts_with("B") awesome!
  mutate(pctWhite = ifelse(TotalPop > 0, Whites / TotalPop,0),
         pctBachelors = ifelse(TotalPop > 0, ((FemaleBachelors + MaleBachelors) / TotalPop * 100),0),
         pctPoverty = ifelse(TotalPop > 0, TotalPoverty / TotalPop, 0),
         year = "2017") %>%
  dplyr::select(-Whites, -FemaleBachelors, -MaleBachelors, -TotalPoverty) 

# --- Combining 09 and 17 data ----

allTracts <- rbind(tracts09,tracts17)

1.2 Load Dallas Transit Data

ggplot() + 
  geom_sf(data= Dallas) +
  geom_sf(data=D_Stops, 
          aes(color = 'orange'), 
          show.legend = "point", size= 2) +
  scale_colour_manual(values = c("orange"),
                      labels = 'Stations',
                      name = ' ') +
  labs(title="Dallas Transit Stops", 
       subtitle=" ", 
       caption="Figure 1.1") + 
  plotTheme() +
  theme(plot.title = element_text(color = "darkred", size=15, face="bold"))

1.3 Identifying TOD Tracts

D_Buffers <- 
  rbind(
    st_buffer(D_Stops, 2640) %>%
      mutate(Legend = "Buffer") %>%
      dplyr::select(Legend),
    st_union(st_buffer(D_Stops, 2640)) %>%
      st_sf() %>%
      mutate(Legend = "Unioned Buffer"))
buffer <- filter(D_Buffers, Legend=="Unioned Buffer")

selectCentroids <-
  st_centroid(tracts09)[buffer,] %>%
  st_drop_geometry() %>%
  left_join(dplyr::select(tracts09, GEOID)) %>%
  st_sf() %>%
  dplyr::select(TotalPop) %>%
  mutate(Selection_Type = "Select by Centroids")

head(selectCentroids,5)

# Census Data in Group by TODs
allTracts.group <- 
  rbind(
    st_centroid(allTracts)[buffer,] %>%
      st_drop_geometry() %>%
      left_join(allTracts) %>%
      st_sf() %>%
      mutate(TOD = "TOD"),
    st_centroid(allTracts)[buffer, op = st_disjoint] %>%
      st_drop_geometry() %>%
      left_join(allTracts) %>%
      st_sf() %>%
      mutate(TOD = "Non-TOD")) %>%
  mutate(MedRent.inf = ifelse(year == "2009", MedRent * 1.14, MedRent))  # inflation

# TOD and Non-TOD between 2009 and 2017
ggplot(allTracts.group)+
  geom_sf(data = st_union(tracts09))+
  geom_sf(aes(fill = TOD)) +
  labs(title = "Time/Space Groups",
       subtitle = '',
       caption = 'Figure 1') +
  facet_wrap(~year)+
  mapTheme() + 
  theme(plot.title = element_text(color = "darkred", size=15, face="bold")) 

2. Changes in Census Variables Over Time and Space

2.1 Median Rent Comparison between 2009 and 2017

ggplot(allTracts.group)+
  geom_sf(data = st_union(tracts09))+
  geom_sf(aes(fill = q5(MedRent.inf))) +
  scale_fill_manual(values = palette5,
                    labels = qBr(allTracts.group, "MedRent.inf"),
                    name = "Rent\n(Quintile Breaks)") +
  labs(title = "Median Rent (w/inflation) 2009-2017", 
       subtitle = "Real dollars \n",
       caption = 'Figure 2.1') +
  facet_wrap(c(~year, ~TOD))+
  mapTheme() + 
  theme(plot.title = element_text(color = "darkred", size=15, face="bold")) 

2.2 Percentage of White Group between 2009 and 2017

ggplot(allTracts.group)+
  geom_sf(data = st_union(tracts09)) +
  geom_sf(aes(fill = q5(pctWhite))) +
  scale_fill_manual(values = palette5,
                    labels = qBr(allTracts.group, "pctWhite"),
                    name = "Percentage of White Group\n(Quintile Breaks)") +
  labs(title = "Percentage of White Group 2009-2017",
       subtitle = '',
       caption = 'Figure 2.2') +
  facet_wrap(c(~year,~TOD))+
  mapTheme() + 
  theme(plot.title = element_text(color = "darkred", size=15, face="bold")) 

2.3 Bachelor Degree Comparison between 2009 and 2017

ggplot(allTracts.group)+
  geom_sf(data = st_union(tracts09))+
  geom_sf(aes(fill = q5(pctBachelors))) +
  scale_fill_manual(values = palette5,
                    labels = qBr(allTracts.group, "pctBachelors"),
                    name = "Percentage of Bachelar Degree (%)\n(Quintile Breaks)") +
  labs(title = "Bachelor Degree Comparison between 2009-2017",
       subtitle = '',
       caption = 'Figure 2.3') +
  facet_wrap(c(~year,~TOD))+
  mapTheme() + 
  theme(plot.title = element_text(color = "darkred", size=15, face="bold")) 

2.4 Median Household Income Comparison between 2009 and 2017

ggplot(allTracts.group)+
  geom_sf(data = st_union(tracts09))+
  geom_sf(aes(fill = q5(MedHHInc))) +
  scale_fill_manual(values = palette5,
                    labels = qBr(allTracts.group, "MedHHInc"),
                    name = "Median Household Income\n(Quintile Breaks)") +
  labs(title = "Median Household Income 2009-2017",
       subtitle = 'Real dollars \n',
       caption = 'Figure 2.4') +
  facet_wrap(c(~year,~TOD))+
  mapTheme() + 
  theme(plot.title = element_text(color = "darkred", size=15, face="bold")) 

3. Bar Plot Comparisons

We can examine these same variables over time using a barplot to more precisely compare the values within and outside of transit-oriented development tracts. Rent was and household income were adjusted for inflation.

Highlight: Percentage of people with a bachelor’s degree and % of white people increased in transit areas between 2009-2017 but stayed at similar levels outside of these areas.

Highlight: Even when adjusting for inflation, household income and rent increased everywhere, regardless of transit-status, and percentage of people in poverty decreased everywhere.

allTracts.Summary <- 
  st_drop_geometry(allTracts.group) %>%
  group_by(year, TOD) %>%
  summarize(Rent = mean(MedRent, na.rm = T),
            Population = mean(TotalPop, na.rm = T),
            `White Proprotion` = mean(pctWhite, na.rm = T),
            `Bachelor Degree` = mean(pctBachelors, na.rm = T),
            Income = mean(MedHHInc, na.rm = T))

allTracts.Summary %>%
  gather(Variable, Value, -year, -TOD) %>%
  ggplot(aes(year, Value, fill = TOD)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~Variable, scales = "free", ncol = 3) +
  scale_fill_manual(values = c("#debae4", "#800694")) +
  labs(title = "Indicator differences across time and space \n",
       caption = 'Figure 3.1') +
  plotTheme() + 
  theme(legend.position="bottom") +
  theme(plot.title = element_text(color = "darkred", size=15, face="bold")) 

4. Table Comparisons

这里要具体谈一下数字,因为可以计算。 We can compare these same values using a tables as well. While the visual change isn’t as clear, we are able to read the exact values. For example, the percentage of 18-24 year old residents with a bachelors degree increased from 1.8% to 2.5% in TOD areas but decreased from 1.3% to 1.2% in other areas.

We can also calculate the exact change in percent increase and see if things are increasing faster within TOD areas.

Highlight: With a 43% rise, rent is increasing at a greater rate in transit-oriented development vs non-TOD development (28% increase).

Highlight: Population is also increasing at faster rate within transit-oriented areas vs non-transit-oriented areas (25% increase vs 17% increase, respectively)

allTracts.Summary %>%
  unite(year.TOD, year, TOD, sep = ": ", remove = T) %>%
  gather(Variable, Value, -year.TOD) %>%
  mutate(Value = round(Value, 2)) %>%
  spread(year.TOD, Value) %>%
  kable() %>%
  kable_styling() %>%
  footnote(general_title = "\n",
           general = "Table 4.1")
Variable 2009: Non-TOD 2009: TOD 2017: Non-TOD 2017: TOD
Bachelor Degree 0.58 1.13 0.68 1.54
Income 57573.74 47845.36 65220.27 59105.64
Population 5101.41 4479.06 5186.17 4255.57
Rent 756.44 706.39 947.34 921.06
White Proprotion 0.61 0.61 0.61 0.66

Table 4.1

5. Graduated Symbol Maps

We examined population and rent within 0.5 miles of each transit stop. We used an areal interpolation to estimate the population within 0.5 miles of each transit stop.

5.1 Population within 0.5 mile of each transit station

popByStation <-
  join_data %>% 
  group_by(STA_NAME) %>%
  summarize(`Total Population` = sum(TotalPop)) %>%
  dplyr::select(STA_NAME, `Total Population`) %>%
  st_drop_geometry() %>%
  left_join(D_Stops)

popByStation_sf <- popByStation %>%
  st_sf()

ggplot() + 
  geom_sf(data = Dallas, fill = 'lightgrey') +
  geom_sf(data = popByStation_sf, aes(size = `Total Population`), color = '#5e9bb2') +
  labs(title = "Graduated Symbol Map I: \nPopulation within 0.5 Mile of Each Transit Station" ,
       subtitle = '',
       caption = "Figure 5.1", 
       fill = 'Total Population') +
  plotTheme() + 
  theme(legend.position="right") + 
  theme(plot.title = element_text(color = "darkred", size=15, face="bold")) 

5.2 Median rent within 0.5 mile of each transit station

rentByStation <-
  join_data %>%
  group_by(STA_NAME) %>%
  summarize(`Median Rent` = median(na.omit(MedRent))) %>%
  dplyr::select(STA_NAME, `Median Rent`)  %>% 
  st_drop_geometry() %>%
  left_join(D_Stops)

rentByStation_sf <- rentByStation %>%
  st_sf()

ggplot() + 
  geom_sf(data = Dallas, fill = 'lightgrey') +
  geom_sf(data = rentByStation_sf, aes(size = `Median Rent`), color = '#c2a7ef')  +  
  labs(title = "Graduated Symbol Map II: \nMedian Rent within 0.5 Mile of Each Transit Station",
       subtitle = '',
       caption = "Figure 5.2") +
  plotTheme() + 
  theme(legend.position="right") + 
  theme(plot.title = element_text(color = "darkred", size=15, face="bold"))

6. Mean Rent as a function of distance to subway stations

Our next analysis looked at how average rent changes as a resident gets further from the nearest transit station. For each census tract, we calculated the max distance a tract is from a transit zone.

Highlight: Even when adjusting for inflation, rent has increased between 2009 and 2017.

Highlight: In 2009 and 2017, rent is at its lowest immediately near transit stations. There is a small and inconsistent rise in pricing as you move farther from transit. This may suggest that housing within transit-oriented development areas may be more accessible to more people.

allTracts.rings <-
  st_join(st_centroid(dplyr::select(allTracts, GEOID, year)), 
          multipleRingBuffer(st_union(D_Stops), 47520, 2640)) %>%
  st_drop_geometry() %>%
  left_join(dplyr::select(allTracts, GEOID, MedRent, year), 
            by=c("GEOID"="GEOID", "year"="year")) %>%
  st_sf() %>%
  mutate(distance = distance / 5280) 

allTracts.rings <- na.omit(allTracts.rings)

mean_RENT <- allTracts.rings %>%
  group_by(year, distance) %>%
  summarize(mean_RENT = mean(MedRent)) 

ggplot(data = mean_RENT) +
  geom_line(aes(x = distance, y = mean_RENT, col = year), size = 2) + 
  geom_point(aes(x = distance, y = mean_RENT, col = year) , size = 4) + 
  labs(title = " Rent as a function of distance to subway stations",
       subtitle = '',
       caption = 'Figure 6.1') +
  scale_color_manual(values = c('#5f829e','#fab18e'),
                     name = 'Year') +
  theme(legend.position = "right") +
  xlab("Distance to Stations, Miles") + 
  ylab("Average Rents") +
  theme_classic() +
  theme(plot.title = element_text(color = "darkred", size=15, face="bold")) 

7. Crime Rate, Transit Access and Rents

Finally, we wanted to examine the relationship between TOD and crime and population.

# 1. Load Crime-Burglary Data
crime_dat <- read.csv('Dallas_Crime_Data.CSV')

crime_dat_point <- st_as_sf(crime_dat, coords = c("Longitude", "Latitude"), crs = 4326, agr = "constant") %>%
  select(Type, geometry)

# 2. Plot Relation between Burglary and Rents
ggplot(allTracts.group) + 
geom_sf(data = st_union(tracts09))+
  geom_sf(aes(fill = q5(MedRent.inf))) +
  geom_sf(data = crime_dat_point, 
          aes(color = Type),
          show.legend = 'point',size = 1) +
  scale_fill_manual(values = palette5,
                    labels = qBr(allTracts.group, "MedRent.inf"),
                    name = "Rent\n(Quintile Breaks)")  +
  labs(title = "Relation between Burglary and Rents \n",
       caption = 'Figure 7.1') +
  theme(legend.position="right") +
  guides(fill=guide_legend(ncol=2)) +
  theme(plot.title = element_text(color = "darkred", size=15, face="bold")) 

# 3. Plot Relation between Burglary and Transit
ggplot(allTracts.group)+
  geom_sf(data = st_union(tracts09))+
  geom_sf(aes(fill = TOD)) +
  scale_fill_manual(values = palette5) +
            
  geom_sf(data = crime_dat_point, 
          aes(color = Type),
          show.legend = 'point',size = 1) +
  labs(title = "Relation between Burglary and Transit \n",
       caption = 'Figure 7.2') +
  theme(legend.position="right") +
  mapTheme() + 
  theme(plot.title = element_text(color = "darkred", size=15, face="bold")) 

IV. Conclusion and Recommandation

To sum up, this analysis project has demonstrated the complex relationships that TOD can have on an evolving city.